rm(list = ls())
setwd("~/Dropbox/Historical Displacement and Ethnicity - Afghanistan/REPOSITORY")

library(ggplot2)
library(dplyr)
library(tidyr)
library(readr)

# Read in the exported Stata results
results <- read.csv("replication/data/intermediate/MISTIresults.csv")
results <- sapply(results, gsub, pattern = "^=", replacement = "")
colnames(results) <- results[1, ]
results <- results[-1, ]
results <- as.data.frame(results)
results[, 2:26] <- lapply(results[, 2:26], as.numeric)

# Mean of the residual category
means <- read.csv("replication/data/intermediate/MISTIresults_means.csv")
means <- sapply(means, gsub, pattern = "^=", replacement = "")
colnames(means) <- means[1, ]
means <- means[-1, ]
means <- as.data.frame(t(means))
means[, 2:26] <- lapply(means[, 2:26], as.numeric)

# Extract coefficients and standard errors for each group
non_pashtun_displaced_effect     <- results[results$V1 == "nonpashtun_displaced", 2:26]
non_pashtun_displaced_se         <- results[which(results$V1 == "nonpashtun_displaced") + 1, 2:26]
non_pashtun_non_displaced_effect <- results[results$V1 == "nonpashtun_nondisplaced", 2:26]
non_pashtun_non_displaced_se     <- results[which(results$V1 == "nonpashtun_nondisplaced") + 1, 2:26]
displaced_pashtun_effect         <- results[results$V1 == "displaced_pashtun", 2:26]
displaced_pashtun_se             <- results[which(results$V1 == "displaced_pashtun") + 1, 2:26]
all_pashtun_north_effect         <- results[results$V1 == "all_pashtun_north", 2:26]
all_pashtun_north_se             <- results[which(results$V1 == "all_pashtun_north") + 1, 2:26]
displaced_pashtun2_effect        <- results[results$V1 == "displaced_pashtun2", 2:26]
displaced_pashtun2_se            <- results[which(results$V1 == "displaced_pashtun2") + 1, 2:26]

# Create a dataframe with the desired groups and their effects
effects_df <- data.frame(
  group = c(
    rep("Displaced\nPashtuns", 25),
    rep("Non-Pashtuns\nin Displaced\nRegions", 25),
    rep("Non-Pashtuns\nNon-Displaced", 25),
    rep("All Pashtuns\nin the North", 25),
    rep("Displaced\nPashtuns\n(Alternative)", 25)
  ),
  effect = unlist(c(
    displaced_pashtun_effect,
    non_pashtun_displaced_effect,
    non_pashtun_non_displaced_effect,
    all_pashtun_north_effect,
    displaced_pashtun2_effect
  )),
  se = unlist(c(
    displaced_pashtun_se,
    non_pashtun_displaced_se,
    non_pashtun_non_displaced_se,
    all_pashtun_north_se,
    displaced_pashtun2_se
  )),
  dv = names(c(
    displaced_pashtun_effect,
    non_pashtun_displaced_effect,
    non_pashtun_non_displaced_effect,
    all_pashtun_north_effect,
    displaced_pashtun2_effect
  ))
)

effects_df1 <- effects_df[
  effects_df$dv %in% c(
    "family_id1", "ethnic_id1", "local_id1",
    "afghan_id1", "religion_id1"
  ) &
    !is.na(effects_df$effect),
]

effects_df1$dv <- factor(
  effects_df1$dv,
  levels = c(
    "family_id1", "ethnic_id1", "local_id1",
    "afghan_id1", "religion_id1"
  ),
  labels = c(
    paste0("Family Identity\n(mean=", round(means$family_id1, 3), ")"),
    paste0("Ethnic Identity\n(mean=", round(means$ethnic_id1, 3), ")"),
    paste0("Local Identity\n(mean=", round(means$local_id1, 3), ")"),
    paste0("Afghan Identity\n(mean=", round(means$afghan_id1, 3), ")"),
    paste0("Religion Identity\n(mean=", round(means$religion_id1, 3), ")")
  )
)

effects_df1 <- effects_df1[, c("dv", "group", "effect", "se")]
effects_df1 <- effects_df1[order(effects_df1$dv), ]

# Reordering the covariate and outcome categories
effects_df1$group <- factor(
  effects_df1$group,
  levels = c(
    "Non-Pashtuns\nNon-Displaced",
    "Non-Pashtuns\nin Displaced\nRegions",
    "Displaced\nPashtuns"
  )
)

pdf("replication/results/figures/Figure9.pdf", width = 15, height = 5)
ggplot(effects_df1, aes(x = group, y = effect)) +
  geom_segment(
    aes(
      x = group, xend = group,
      y = effect - 1.96 * se,
      yend = effect + 1.96 * se,
      linetype = "95% Confidence Interval"
    ),
    linewidth = 2
  ) +
  # geom_segment(aes(x = group, xend = group, y = effect - 1.645 * se,
  # yend = effect + 1.645 * se, linetype = "90% Confidence Interval")) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~dv, nrow = 1) +
  coord_flip() +
  xlab("") +
  ylab("") + # "Compared to the baseline group, Non-Pashtuns Non-Displaced"
  theme(
    panel.background   = element_blank(),
    panel.grid.major   = element_line(colour = "gray90", linetype = "solid"),
    axis.text.x        = element_text(size = 16),
    axis.text.y        = element_text(size = 16),
    strip.text         = element_text(size = 16),
    panel.border       = element_rect(colour = "black", fill = FALSE),
    strip.background   = element_rect(colour = "black"),
    axis.title         = element_text(size = 16),
    legend.title       = element_blank(),
    legend.text        = element_text(size = 16, margin = margin(t = 20), vjust = 5),
    legend.position    = "bottom",
    panel.spacing.x    = unit(2, "lines")
  ) +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
dev.off()

###########################
### Alternative Measure ###
###########################

effects_df6 <- effects_df[
  effects_df$dv %in% c(
    "family_id6", "ethnic_id6", "local_id6",
    "afghan_id6", "religion_id6"
  ) &
    !is.na(effects_df$effect),
]

effects_df6$dv <- factor(
  effects_df6$dv,
  levels = c(
    "family_id6", "ethnic_id6", "local_id6",
    "afghan_id6", "religion_id6"
  ),
  labels = c(
    paste0("Family Identity\n(mean=", round(means$family_id6, 3), ")"),
    paste0("Ethnic Identity\n(mean=", round(means$ethnic_id6, 3), ")"),
    paste0("Local Identity\n(mean=", round(means$local_id6, 3), ")"),
    paste0("Afghan Identity\n(mean=", round(means$afghan_id6, 3), ")"),
    paste0("Religion Identity\n(mean=", round(means$religion_id6, 3), ")")
  )
)

effects_df6 <- effects_df6[, c("dv", "group", "effect", "se")]
effects_df6 <- effects_df6[order(effects_df6$dv), ]

# Reordering the covariate and outcome categories
effects_df6$group <- factor(
  effects_df6$group,
  levels = c(
    "Non-Pashtuns\nNon-Displaced",
    "Non-Pashtuns\nin Displaced\nRegions",
    "Displaced\nPashtuns\n(Alternative)"
  )
)

pdf("replication/results/figures/FigureA14.pdf", width = 15, height = 5)
ggplot(effects_df6, aes(x = group, y = effect)) +
  geom_segment(
    aes(
      x = group, xend = group,
      y = effect - 1.96 * se,
      yend = effect + 1.96 * se,
      linetype = "95% Confidence Interval"
    ),
    linewidth = 2
  ) +
  # geom_segment(aes(x = group, xend = group, y = effect - 1.645 * se,
  # yend = effect + 1.645 * se, linetype = "90% Confidence Interval")) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~dv, nrow = 1) +
  coord_flip() +
  xlab("") +
  ylab("") + # "Compared to the baseline group, Non-Pashtuns Non-Displaced"
  theme(
    panel.background   = element_blank(),
    panel.grid.major   = element_line(colour = "gray90", linetype = "solid"),
    axis.text.x        = element_text(size = 16),
    axis.text.y        = element_text(size = 16),
    strip.text         = element_text(size = 16),
    panel.border       = element_rect(colour = "black", fill = FALSE),
    strip.background   = element_rect(colour = "black"),
    axis.title         = element_text(size = 16),
    legend.title       = element_blank(),
    legend.text        = element_text(size = 16, margin = margin(t = 20), vjust = 5),
    legend.position    = "bottom",
    panel.spacing.x    = unit(2, "lines")
  ) +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
dev.off()

###################################
### different comparison groups ###
###################################

means <- as.data.frame(
  cbind(colnames(means)[2:26], t(means[, 2:26]))
)
colnames(means) <- c("dv", "mean")
means$mean <- as.numeric(means$mean)

effects_df <- effects_df[
  !is.na(effects_df$effect) &
    (substr(effects_df$dv, nchar(effects_df$dv), nchar(effects_df$dv)) %in% c(1, 3, 4, 5)) &
    !(effects_df$group %in% c("Non-Pashtuns\nNon-Displaced", "Non-Pashtuns\nin Displaced\nRegions")),
]
effects_df <- merge(effects_df, means, by = "dv", all.x = TRUE)

# Specify the comparison group (explanatory variables)
effects_df$group[substr(effects_df$dv, nchar(effects_df$dv), nchar(effects_df$dv)) == 1] <- 
  "As presented in\nthe main manuscript"
effects_df$group[substr(effects_df$dv, nchar(effects_df$dv), nchar(effects_df$dv)) == 3] <- 
  "Displaced Pashtuns\nvs.\nPashtuns in the South"
effects_df$group[substr(effects_df$dv, nchar(effects_df$dv), nchar(effects_df$dv)) == 4] <- 
  "Pashtuns in the North\nvs.\nAll Others"
effects_df$group[substr(effects_df$dv, nchar(effects_df$dv), nchar(effects_df$dv)) == 5] <- 
  "Displaced Pashtuns\nvs.\nOthers in the North,\noutside the Displaced Region"

effects_df$group <- factor(
  effects_df$group,
  levels = c(
    "As presented in\nthe main manuscript",
    "Displaced Pashtuns\nvs.\nPashtuns in the South",
    "Pashtuns in the North\nvs.\nAll Others",
    "Displaced Pashtuns\nvs.\nOthers in the North,\noutside the Displaced Region"
  )
)

effects_df <- effects_df[order(effects_df$group, decreasing = TRUE), ]

# (dependent variables)
effects_df$dv <- ifelse(
  effects_df$dv %in% c("family_id1", "family_id3", "family_id4", "family_id5"),
  "Family Identity", effects_df$dv
)
effects_df$dv <- ifelse(
  effects_df$dv %in% c("ethnic_id1", "ethnic_id3", "ethnic_id4", "ethnic_id5"),
  "Ethnic Identity", effects_df$dv
)
effects_df$dv <- ifelse(
  effects_df$dv %in% c("local_id1", "local_id3", "local_id4", "local_id5"),
  "Local Identity", effects_df$dv
)
effects_df$dv <- ifelse(
  effects_df$dv %in% c("afghan_id1", "afghan_id3", "afghan_id4", "afghan_id5"),
  "Afghan Identity", effects_df$dv
)
effects_df$dv <- ifelse(
  effects_df$dv %in% c("religion_id1", "religion_id3", "religion_id4", "religion_id5"),
  "Religion Identity", effects_df$dv
)

effects_df$dv <- factor(
  effects_df$dv,
  levels = c(
    "Family Identity", "Ethnic Identity", "Local Identity",
    "Afghan Identity", "Religion Identity"
  )
)

pdf("replication/results/figures/FigureA10.pdf", width = 16, height = 12)
ggplot(effects_df, aes(x = 1, y = effect)) +
  geom_segment(
    aes(
      xend = 1,
      y = effect - 1.96 * se,
      yend = effect + 1.96 * se,
      linetype = "95% Confidence Interval"
    ),
    linewidth = 2
  ) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_grid(group ~ dv, switch = "y") +
  coord_flip() +
  xlab("") +
  ylab("") +
  theme(
    panel.background   = element_blank(),
    panel.grid.major   = element_line(colour = "gray90", linetype = "solid"),
    axis.text.x        = element_text(size = 16),
    axis.text.y        = element_blank(),
    axis.ticks.y       = element_blank(),
    strip.text         = element_text(size = 16),
    strip.text.x       = element_text(size = 16, margin = margin(t = 20, b = 20)),
    strip.text.y.left  = element_text(size = 16, angle = 0, hjust = 0.5),
    panel.border       = element_rect(colour = "black", fill = FALSE),
    strip.background   = element_rect(colour = "black"),
    axis.title         = element_text(size = 16),
    legend.title       = element_blank(),
    legend.text        = element_text(size = 16, margin = margin(t = 20), vjust = 5),
    legend.position    = "bottom",
    panel.spacing      = unit(2, "lines")
  ) +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1)) +
  geom_text(
    data = effects_df,
    aes(x = 1, y = Inf, label = paste0("mean=", round(mean, 3))),
    hjust = 1.05, vjust = 4, size = 5, inherit.aes = FALSE
  )
dev.off()